{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "topicdata = pd.read_excel(\"150-20-topics-data.xlsx\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "topicdata = topicdata.sort_values(by=['weight'], ascending = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "     index   weight                                              words  \\\n",
      "7        7  0.03779  mom dad daughter year son man wife woman girl ...   \n",
      "142    142  0.02517  dog cat man dogs shelter home woman family pit...   \n",
      "29      29  0.02465  trump million pay tax money billion workers cu...   \n",
      "54      54  0.02026  kids children parents people things don child ...   \n",
      "70      70  0.02025  don people things didn doesn make stop wrong g...   \n",
      "\n",
      "                              topic title                category subcategory  \\\n",
      "7                            family viral    family/relationships         NaN   \n",
      "142  animal shelter adoptions and rescues  uplifting/heartwarming         NaN   \n",
      "29                       trump and ecnomy                politics        jobs   \n",
      "54                              parenting    family/relationships   parenting   \n",
      "70                                   misc                    misc         NaN   \n",
      "\n",
      "    second category second subcategory  \n",
      "7               NaN                NaN  \n",
      "142         animals                NaN  \n",
      "29              NaN                NaN  \n",
      "54              NaN                NaN  \n",
      "70              NaN                NaN  \n"
     ]
    }
   ],
   "source": [
    "print(topicdata.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "topicdata = topicdata [[\"index\", \"topic title\", \"weight\", \"words\", \"category\", \"subcategory\", \"second category\", \"second subcategory\"] ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "     index                           topic title   weight  \\\n",
      "7        7                          family viral  0.03779   \n",
      "142    142  animal shelter adoptions and rescues  0.02517   \n",
      "29      29                      trump and ecnomy  0.02465   \n",
      "54      54                             parenting  0.02026   \n",
      "70      70                                  misc  0.02025   \n",
      "\n",
      "                                                 words  \\\n",
      "7    mom dad daughter year son man wife woman girl ...   \n",
      "142  dog cat man dogs shelter home woman family pit...   \n",
      "29   trump million pay tax money billion workers cu...   \n",
      "54   kids children parents people things don child ...   \n",
      "70   don people things didn doesn make stop wrong g...   \n",
      "\n",
      "                   category subcategory second category second subcategory  \n",
      "7      family/relationships         NaN             NaN                NaN  \n",
      "142  uplifting/heartwarming         NaN         animals                NaN  \n",
      "29                 politics        jobs             NaN                NaN  \n",
      "54     family/relationships   parenting             NaN                NaN  \n",
      "70                     misc         NaN             NaN                NaN  \n"
     ]
    }
   ],
   "source": [
    "print(topicdata.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "docname = \"/Users/tessmcnulty/topicdataranked.csv\"\n",
    "topicdata.to_csv(docname)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "categories = topicdata[\"category\"].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "family/relationships\n",
      "0.1149\n",
      "uplifting/heartwarming\n",
      "0.05202\n",
      "politics\n",
      "0.3535\n",
      "misc\n",
      "0.02635\n",
      "entertainment\n",
      "0.17256\n",
      "unfortunate event\n",
      "0.0503\n",
      "homemaking/diy\n",
      "0.02972\n",
      "race and identity/controversy\n",
      "0.06621\n",
      "animals\n",
      "0.04072\n",
      "nature\n",
      "0.01624\n",
      "travel\n",
      "0.02017\n",
      "science\n",
      "0.04141\n",
      "funny\n",
      "0.01121\n",
      "food\n",
      "0.04634\n",
      "art/craft\n",
      "0.03285\n",
      "tech/social media\n",
      "0.01835\n",
      "beauty\n",
      "0.02237\n",
      "inspiration/advice\n",
      "0.01509\n",
      "religious\n",
      "0.01987\n",
      "health and fitness\n",
      "0.01228\n",
      "quiz\n",
      "0.01295\n",
      "products\n",
      "0.00691\n",
      "disasters\n",
      "0.00495\n",
      "spanish\n",
      "0.00625\n",
      "arabic\n",
      "0.00036\n"
     ]
    }
   ],
   "source": [
    "for category in categories:\n",
    "    count = 0\n",
    "    for i in range(0,150):\n",
    "        if topicdata['category'][i] == category:\n",
    "            count += topicdata['weight'][i]\n",
    "    print(category)\n",
    "    print(count)\n",
    "\n",
    "\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "arabic\n"
     ]
    }
   ],
   "source": [
    "for i in range (0,150):\n",
    "    if topicdata['category'][i] == \"arabic\":\n",
    "        print(topicdata['topic title'][i])\n",
    "        #print(topicdata['words'][i])\n",
    "        #print(topicdata['weight'][i])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:root] *",
   "language": "python",
   "name": "conda-root-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
